Disco Diffusion
Contents
Disco Diffusion#
This is an simple way of creating compelling Disco Diffusion artworks for generative artists.
References:#
Note
Install ekorpkit package first.
Set logging level to Warning, if you don’t want to see verbose logging.
If you run this notebook in Colab, set Hardware accelerator to GPU.
!pip install ekorpkit[disco] exit()
from ekorpkit import eKonf
eKonf.setLogger("WARNING")
eKonf.set_cuda(device=4)
print("version:", eKonf.__version__)
is_notebook = eKonf.is_notebook()
is_colab = eKonf.is_colab()
print("is notebook?", is_notebook)
print("is colab?", is_colab)
if is_colab:
eKonf.mount_google_drive(
workspace="MyDrive/colab_workspace", project="disco-imagen"
)
print("environment variables:")
eKonf.print(eKonf.env().dict())
INFO:ekorpkit.base:Setting cuda device to ['A100-SXM4-40GB (id:4)']
INFO:ekorpkit.base:Google Colab not detected.
version: 0.1.38+8.g318b9fe.dirty
is notebook? True
is colab? False
environment variables:
{'CUDA_DEVICE_ORDER': 'PCI_BUS_ID',
'CUDA_VISIBLE_DEVICES': '4',
'EKORPKIT_CONFIG_DIR': '/workspace/projects/ekorpkit-book/config',
'EKORPKIT_DATA_DIR': None,
'EKORPKIT_LOG_LEVEL': 'WARNING',
'EKORPKIT_PROJECT': 'ekorpkit-book',
'EKORPKIT_WORKSPACE_ROOT': '/workspace',
'KMP_DUPLICATE_LIB_OK': 'TRUE',
'NUM_WORKERS': 230}
self-upgrade of ekorpkit#
# eKonf.upgrade(prelease=True, quiet=False, verbose=True, force_reinstall=False)
Create a disco instance#
cfg = eKonf.compose("model/disco")
disco = eKonf.instantiate(cfg)
INFO:ekorpkit.base:Loaded .env from /workspace/projects/ekorpkit-book/config/.env
INFO:ekorpkit.base:setting environment variable CACHED_PATH_CACHE_ROOT to /workspace/.cache/cached_path
INFO:ekorpkit.base:setting environment variable KMP_DUPLICATE_LIB_OK to TRUE
Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.8/site-packages/lpips/weights/v0.1/vgg.pth
Imagine#
text_prompts = "Dovish members of the Federal Reserve Board are to cut the target interest rates. matte style, artstation"
batch_name = "dovish"
# text_prompts = 'At a special meeting, hawkish central bankers are poised to raise the target rates. Trending on artstation, matte style'
# batch_name = "hawkish"
# text_prompts = "Mt. Halla's beautiful flowers, photorealistic"
# batch_name = "halla"
# text_prompts = "Brave and beautiful new world in Jeju, matte style, artstaion"
text_prompts = "Brave and beautiful new world in Jeju, artstation matte painting."
batch_name = "newworld"
disco.parameters()
{'advanced': {'clamp_grad': "true {type: 'boolean'}",
'clamp_max': "0.05 {type: 'number'}",
'eta': "0.8 {type: 'number'}",
'set_seed': "random_seed {type: 'string'}"},
'advanced_extra': {'clip_denoised': 'false',
'fuzzy_prompt': 'false',
'rand_mag': '0.05',
'randomize_class': 'true'},
'advanced_perlin_init': {'perlin_init': "false {type: 'boolean'} : Perlin "
'init will replace your init',
'perlin_mode': "mixed ['mixed', 'color', 'gray']"},
'animation_2d': {'angle': "'0:(0)' {type: 'string'} : All rotations are "
'provided in degrees.',
'far_plane': "10000 {type: 'number'}",
'fov': "40 {type: 'number'}",
'interp_spline': 'Linear '
"['Linear','Quadratic','Cubic']{type: "
"'string'} : Do not change, currently will "
'not look good.',
'key_frames': "true {type: 'boolean'}",
'max_frames': "10000 {type: 'number'}",
'midas_depth_model': "dpt_large {type: 'string'}",
'midas_weight': "0.3 {type: 'number'}",
'near_plane': "200 {type: 'number'}",
'padding_mode': "border {type: 'string'}",
'rotation_3d_x': "'0: (0)' {type: 'string'}",
'rotation_3d_y': "'0: (0)' {type: 'string'}",
'rotation_3d_z': "'0: (0)' {type: 'string'}",
'sampling_mode': "bicubic {type: 'string'}",
'translation_x': "'0: (0)' {type: 'string'}",
'translation_y': "'0: (0)' {type: 'string'}",
'translation_z': "'0: (10.0)' {type: 'string'}",
'zoom': "'0: (1), 10: (1.05)' {type: 'string'} : # `zoom` is "
'a multiplier of dimensions, 1 is no zoom.'},
'animation_3d_turbo_mode': {'turbo_mode': "false {type: 'boolean'} : (Starts "
'after frame 10,) skips diffusion '
'steps and just uses depth map to '
'warp images for skipped frames. '
'Speeds up rendering by 2x-4x, and '
'may improve image coherence '
'between frames. For different '
'settings tuned for Turbo Mode, '
'refer to the original Disco-Turbo '
'Github: '
'https://github.com/zippy731/disco-diffusion-turbo',
'turbo_preroll': '10 # frames',
'turbo_steps': '"3" ["2","3","4","5","6"] {type: '
'"string"}'},
'animation_3d_vr_mode': {'vr_eye_angle': "0.5 {type: 'number'} : "
'`vr_eye_angle` is the y-axis '
'rotation of the eyes towards the '
'center',
'vr_ipd': "5.0 {type: 'number'} : interpupillary "
'distance (between the eyes)',
'vr_mode': "false {type: 'boolean'} : Enables stereo "
'rendering of left/right eye views '
'(supporting Turbo) which use a different '
'(fish-eye) camera projection matrix. '
"Note the images you're prompting will "
'work better if they have some inherent '
'wide-angle aspect. The generated images '
'will need to be combined into left/right '
'videos. These can then be stitched into '
'the VR180 format. Google made the VR180 '
'Creator tool but subsequently stopped '
"supporting it. It's available for "
'download in a few places including '
'https://www.patrickgrunwald.de/vr180-creator-download. '
'The tool is not only good for stitching '
'(videos and photos) but also for adding '
'the correct metadata into existing '
'videos, which is needed for services '
'like YouTube to identify the format '
'correctly. Watching YouTube VR videos '
"isn't necessarily the easiest depending "
'on your headset. For instance Oculus '
'have a dedicated media studio and store '
'which makes the files easier to access '
'on a Quest '
'https://creator.oculus.com/manage/mediastudio/. '
'The command to get ffmpeg to concat your '
'frames for each eye is in the form: '
'`ffmpeg -framerate 15 -i frame_%4d_l.png '
'l.mp4` (repeat for r)'},
'basic': {'clip_guidance_scale': "5000 {type: 'number'}",
'cutn_batches': "4 {type: 'number'}",
'range_scale': "150 {type: 'number'}",
'sat_scale': "0 {type: 'number'}",
'skip_augs': "false {type: 'boolean'}",
'steps': "250 [25,50,100,150,250,500,1000] {type: 'raw', "
'allow-input: true}',
'tv_scale': "0 {type: 'number'}",
'width_height': "[1280, 768] {type: 'raw'}"},
'coherency': {'frames_scale': "1500 {type: 'integer'} : `frame_scale` tries "
'to guide the new frame to looking like the old '
'one. A good default is 1500.',
'frames_skip_steps': "'60%' ['40%', '50%', '60%', '70%', '80%'] "
"{type: 'string'} : `frame_skip_steps` "
'will blur the previous frame - higher '
'values will flicker less but struggle to '
'add enough new detail to zoom into.'},
'cutn_scheduling': {'cut_ic_pow': "1 {type: 'number'}",
'cut_icgray_p': "'[0.2]*400+[0]*600' {type: 'string'}",
'cut_innercut': "'[4]*400+[12]*600' {type: 'string'}",
'cut_overview': "'[12]*400+[4]*600' {type: 'string'} \n"
'>> Format: `[40]*400+[20]*600` = 40 cuts '
'for the first 400 /1000 steps, then 20 '
'for the last 600/1000. cut_overview and '
'cut_innercut are cumulative for total '
'cutn on any given step. Overview cuts '
'see the entire image and are good for '
'early structure, innercuts are your '
'standard cutn.'},
'init_image': {'init_image': "None {type: 'string'}",
'init_scale': "1000 {type: 'integer'}",
'skip_steps': "10 {type: 'integer'} *Make sure you set "
'skip_steps to ~50% of your steps if you want to '
'use an init image.*'},
'run': {'animation_mode': "None ['None', '2D', '3D', 'Video Input'] "
"{type:'string'}",
'batch_name': "TimeToDisco {type: 'string'}",
'batch_size': '1',
'diffusion_sampling_mode': "ddim ['plms','ddim']",
'display_rate': "20 {type: 'number'}",
'n_samples': "6 {type: 'number'}",
'resume_from_frame': 'latest',
'resume_run': 'false',
'retain_overwritten_frames': 'true',
'run_to_resume': 'latest',
'show_collage': 'true',
'use_secondary_model': 'true'},
'saving': {'intermediate_saves': "0 {type: 'raw'} : Intermediate steps will "
'save a copy at your specified intervals. '
'You can either format it as a single '
'integer or a list of specific steps. A '
'value of `2` will save a copy at 33% and '
'66%. 0 will save none. A value of `[5, 9, '
'34, 45]` will save at steps 5, 9, 34, and '
'45. (Make sure to include the brackets)',
'intermediates_in_subfolder': "true {type: 'boolean'}",
'steps_per_checkpoint': None},
'transformation': {'transformation_percent': '[0.09]',
'use_horizontal_symmetry': "false {type: 'boolean'}",
'use_vertical_symmetry': "false {type: 'boolean'}"},
'video_init': {'video_init_clip_guidance_scale': "1000 {type: 'number'}",
'video_init_cutn_batches': "4 {type: 'number'}",
'video_init_range_scale': "150 {type: 'number'}",
'video_init_sat_scale': "300 {type: 'number'}",
'video_init_skip_steps': "50 {type: 'integer'}",
'video_init_steps': '100 [25,50,100,150,250,500,1000]{type: '
"'raw', allow-input: true}",
'video_init_tv_scale': "0.1 {type: 'number'}"},
'video_init_coherency': {'force_flow_generation': "false {type:'boolean'}",
'video_init_frames_scale': "15000 {type: 'integer'} "
': `frame_scale` tries to '
'guide the new frame to '
'looking like the old '
'one. A good default is '
'1500.',
'video_init_frames_skip_steps': "'70%' ['40%', "
"'50%', '60%', "
"'70%', '80%'] "
"{type: 'string'} : "
'`frame_skip_steps` '
'will blur the '
'previous frame - '
'higher values will '
'flicker less but '
'struggle to add '
'enough new detail '
'to zoom into.'},
'video_init_flow': {'video_init_blend_mode': "optical flow ['None', 'linear', "
"'optical flow'] : Call optical "
'flow from video frames and warp '
'prev frame with flow',
'video_init_check_consistency': 'false',
'video_init_flow_blend': "0.999 {type: 'number'} : 0 - "
'take next frame, 1 - take prev '
'warped frame',
'video_init_flow_warp': "true {type: 'boolean'} : Call "
'optical flow from video frames '
'and warp prev frame with flow'},
'video_init_input': {'extract_nth_frame': "2 {type: 'number'}",
'persistent_frame_output_in_batch_folder': 'true {type: '
"'boolean'}",
'video_init_file': "init.mp4 {type: 'string'}",
'video_init_path': '{..path.init_dir}/{.video_init_file}',
'video_init_seed_continuity': "false {type: 'boolean'}"},
'video_output': {'blend': '0.5',
'fps': '12',
'init_frame': "1 {type: 'number'} : This is the frame where "
'the video will start',
'last_frame': "final_frame {type: 'number'} \n"
'>> You can change i to the number of the last '
'frame you want to generate. \n'
'It will raise an error if that number of '
'frames does not exist.',
'skip_video_for_run_all': 'false',
'video_init_check_consistency': 'false',
'view_video_in_cell': 'false'}}
disco.parameters("animation")
[animation_mode]
None ['None', '2D', '3D', 'Video Input'] {type:'string'}
Image only mode#
disco.imagine(
text_prompts,
batch_name=batch_name,
n_samples=2,
show_collage=True,
width_height = [768, 512],
# perlin_init=True,
diffusion_sampling_mode='ddim',
clip_guidance_scale=7500,
)
WARNING:ekorpkit.visualize.collage:no images found
2 samples generated to /workspace/projects/ekorpkit-book/disco-imagen/outputs/disco-diffusion/newworld
text prompts: Brave and beautiful new world in Jeju, artstation matte painting.
sample image paths:
3D animation mode#
text_prompts = {
0: "A beautiful new world in Jeju, artstation matte painting.",
100: "Beautiful mountains in Jeju, artstation matte painting.",
300: "A beautiful sea in Jeju, artstation matte painting.",
500: "A beautiful woman in Jeju, artstation matte painting.",
700: "A beautiful wind in Jeju, artstation matte painting.",
900: "A beautiful new world in Jeju, artstation matte painting.",
}
batch_name = "newworld-3d"
disco.imagine(
text_prompts,
batch_name=batch_name,
batch_num=11,
resume_run=True,
steps=300,
animation_mode="3D",
rotation_3d_z="0: (0), 500: (1), 600: (-1)",
# perlin_init=True,
# perlin_mode="color",
diffusion_sampling_mode='ddim',
max_frames = 5000,
width_height = [768, 512],
clip_guidance_scale=7500,
)
INFO:ekorpkit.models.disco.base:translation_x: 0.0
INFO:ekorpkit.models.disco.base:translation_y: 0.0
INFO:ekorpkit.models.disco.base:translation_z: 10.0
INFO:ekorpkit.models.disco.base:rotation_3d_x: 0.0
INFO:ekorpkit.models.disco.base:rotation_3d_y: 0.0
INFO:ekorpkit.models.disco.base:rotation_3d_z: -0.6799999999999999
INFO:ekorpkit.models.disco.base:translation: [-0.0, 0.0, -0.05]
INFO:ekorpkit.models.disco.base:rotation: [0.0, 0.0, -0.6799999999999999]
INFO:ekorpkit.models.disco.base:rot_mat: tensor([[[ 1.0000, 0.0119, 0.0000],
[-0.0119, 1.0000, 0.0000],
[ 0.0000, 0.0000, 1.0000]]], device='cuda:4')
INFO:ekorpkit.models.disco.disco_xform_utils:Running AdaBins depth estimation implementation...
Loading base model ()...
Using cache found in /root/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master
Done.
Removing last two layers (global_pool & classifier).
Building Encoder-Decoder model..Done.
INFO:ekorpkit.models.disco.disco_xform_utils:Running MiDaS depth estimation implementation...
2D animation mode#
text_prompts = "Brave and beautiful new world in Jeju, artstation matte painting."
batch_name = "newworld-2d"
disco.imagine(
text_prompts,
batch_name=batch_name,
steps=300,
animation_mode = '2D',
# rotation_3d_z="0: (1)",
# perlin_init=True,
# perlin_mode="color",
diffusion_sampling_mode='ddim',
max_frames = 50,
width_height = [768, 512],
clip_guidance_scale=7500,
)
Video Input Mode#
text_prompts = "A beautiful new world in Jeju, artstation matte painting."
batch_name = "newworld-video"
disco.imagine(
text_prompts,
batch_name=batch_name,
steps=250,
animation_mode="Video Input",
video_init_path="/workspace/projects/ekorpkit-book/disco-imagen/outputs/disco-diffusion/newworld-3d/newworld-3d(9).mp4",
# rotation_3d_z="0: (1)",
# perlin_init=True,
# perlin_mode="color",
diffusion_sampling_mode='ddim',
max_frames = 10,
width_height = [768, 512],
clip_guidance_scale=7500,
)
INFO:ekorpkit.models.disco.base:warping frames with flow blend ratio: 0.999
INFO:ekorpkit.models.disco.base:Image prompt: []
INFO:ekorpkit.models.disco.base:Frame 4 Prompt: ['A beautiful new world in Jeju, artstation matte painting.']
INFO:ekorpkit.models.disco.base:Seed used: 3402861987
INFO:ekorpkit.models.disco.base: >> elapsed time to diffuse: 0:02:33.218085
INFO:ekorpkit.models.art.base:Saving config to /workspace/projects/ekorpkit-book/disco-imagen/outputs/disco-diffusion/newworld-video/newworld-video(0)_settings.yaml
Create a video from output frames#
batch_name = "newworld-video"
batch_num = 0
mp4_path = disco.create_video(
batch_name=batch_name,
batch_num=batch_num,
)
from IPython.display import Video
Video(mp4_path, embed=True)
collage generated sample images#
disco.collage(
batch_name=batch_name,
batch_num=4,
ncols=2,
num_images=4,
show_filename=True,
fontcolor="black",
)
INFO:ekorpkit.models.dalle.base:Loading config from /workspace/projects/ekorpkit-book/disco-imagen/outputs/disco-diffusion/halla/halla(4)_settings.yaml
INFO:ekorpkit.models.dalle.base:Merging config with diffuse defaults
INFO:ekorpkit.models.disco.base:Will save every 2 steps
INFO:ekorpkit.models.disco.base:Using seed: 221812581
INFO:ekorpkit.models.dalle.base:Prompt: Mt. Halla's beautiful flowers, artstation matte painting
INFO:ekorpkit.io.file:Processing [4] files from ['halla(4)_*.png']
make gif of progresses of a specific sample image#
disco.make_gif(
batch_name='halla',
batch_num=1,
sample_num=1,
show=False,
duration=100,
force_remake=True,
)
Saved GIF to /workspace/projects/ekorpkit-book/disco-imagen/outputs/halla/halla(1)_0001.gif
show config#
disco.show_config(batch_name=batch_name, batch_num=4)
INFO:ekorpkit.models.disco.base:Loading config from /workspace/projects/ekorpkit-book/disco-imagen/outputs/halla/halla(4)_settings.yaml
INFO:ekorpkit.models.disco.base:Merging config with diffuse defaults
INFO:ekorpkit.models.disco.base:Merging config with args: {}
INFO:ekorpkit.models.disco.base:Will save every 2 steps
INFO:ekorpkit.models.disco.base:Using seed: 3370684779
{'angle': '0:(0)',
'animation_mode': 'None',
'batch_name': 'halla',
'batch_num': 7,
'batch_size': 1,
'calc_frames_skip_steps': 150,
'clamp_grad': True,
'clamp_max': 0.05,
'clip_denoised': False,
'clip_guidance_scale': 5000,
'cut_ic_pow': 1,
'cut_icgray_p': '[0.2]*400+[0]*600',
'cut_innercut': '[4]*400+[12]*600',
'cut_overview': '[12]*400+[4]*600',
'cutn_batches': 4,
'display_rate': 20,
'eta': 0.8,
'extract_nth_frame': 2,
'far_plane': 10000,
'force_flow_generation': False,
'fov': 40,
'frames_scale': 1500,
'frames_skip_steps': '60%',
'fuzzy_prompt': False,
'image_prompts': None,
'init_image': 'None',
'init_scale': 1000,
'intermediate_saves': 250,
'intermediates_in_subfolder': True,
'interp_spline': 'Linear',
'key_frames': True,
'max_frames': 1,
'midas_depth_model': 'dpt_large',
'midas_weight': 0.3,
'n_samples': 4,
'near_plane': 200,
'padding_mode': 'border',
'perlin_init': False,
'perlin_mode': 'mixed',
'persistent_frame_output_in_batch_folder': True,
'rand_mag': 0.05,
'randomize_class': True,
'range_scale': 150,
'resume_from_frame': 'latest',
'resume_run': False,
'retain_overwritten_frames': False,
'rotation_3d_x': '0: (0)',
'rotation_3d_y': '0: (0)',
'rotation_3d_z': '0: (0)',
'run_to_resume': 'latest',
'sampling_mode': 'bicubic',
'sat_scale': 0,
'seed': 3370684779,
'set_seed': 'random_seed',
'show_collage': True,
'side_x': 768,
'side_y': 512,
'skip_augs': False,
'skip_steps': 10,
'start_frame': 0,
'steps': 250,
'steps_per_checkpoint': 2,
'stop_on_next_loop': False,
'text_prompts': {0: ["Mt. Halla's beautiful flowers, artstation matte "
'painting'],
100: ['This set of prompts start at frame 100',
'This prompt has weight five:5']},
'transformation_percent': [0.09],
'translation_x': '0: (0)',
'translation_y': '0: (0)',
'translation_z': '0: (10.0)',
'turbo_mode': False,
'turbo_preroll': 10,
'turbo_steps': '3',
'tv_scale': 0,
'use_horizontal_symmetry': False,
'use_vertical_symmetry': False,
'video_init_blend_mode': 'optical flow',
'video_init_check_consistency': False,
'video_init_clip_guidance_scale': 1000,
'video_init_cutn_batches': 4,
'video_init_file': 'init.mp4',
'video_init_flow_blend': 0.999,
'video_init_flow_warp': True,
'video_init_frames_scale': 15000,
'video_init_frames_skip_steps': '70%',
'video_init_path': '/workspace/projects/ekorpkit-book/disco-imagen/init_images/init.mp4',
'video_init_range_scale': 150,
'video_init_sat_scale': 300,
'video_init_seed_continuity': False,
'video_init_skip_steps': 50,
'video_init_steps': 100,
'video_init_tv_scale': 0.1,
'vr_eye_angle': 0.5,
'vr_ipd': 5.0,
'vr_mode': False,
'width_height': [768, 512],
'zoom': '0: (1), 10: (1.05)'}